1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26 package build.tools.generatecharacter;
27
28 import java.io.IOException;
29 import java.io.FileNotFoundException;
30 import java.io.BufferedReader;
31 import java.io.FileReader;
32 import java.io.PrintWriter;
33 import java.io.BufferedWriter;
34 import java.io.FileWriter;
35 import java.io.File;
36 import java.util.List;
37
38 import build.tools.generatecharacter.CharacterName;
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67 public class GenerateCharacter {
68
69 final static boolean DEBUG = false;
70
71 final static String commandMarker = "$$";
72 static String ROOT = "";
73 static String DefaultUnicodeSpecFileName = ROOT + "UnicodeData.txt";
74 static String DefaultSpecialCasingFileName = ROOT + "SpecialCasing.txt";
75 static String DefaultPropListFileName = ROOT + "PropList.txt";
76 static String DefaultJavaTemplateFileName = ROOT + "Character.java.template";
77 static String DefaultJavaOutputFileName = ROOT + "Character.java";
78 static String DefaultCTemplateFileName = ROOT + "Character.c.template";
79 static String DefaultCOutputFileName = ROOT + "Character.c";
80
81 static int plane = 0;
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169 private static final int
170 shiftType = 0, maskType = 0x001F,
171 shiftDigitOffset = 5, maskDigitOffset = 0x03E0,
172 shiftNumericType = 10, maskNumericType = 0x0C00,
173 shiftIdentifierInfo = 12, maskIdentifierInfo = 0x7000,
174 maskUnicodePart = 0x1000,
175 shiftCaseInfo = 15, maskCaseInfo = 0x38000,
176 maskLowerCase = 0x20000,
177 maskUpperCase = 0x10000,
178 maskTitleCase = 0x08000,
179 shiftCaseOffset = 18, maskCaseOffset = 0x07FC0000,
180 shiftCaseOffsetSign = 5,
181
182
183 maskDigit = 0x001F,
184
185 maskCase = 0x01FF,
186 shiftBidi = 27, maskBidi = 0x78000000,
187 shiftMirrored = 31,
188 shiftPlane = 16, maskPlane = 0xFF0000;
189
190
191 private static final long maskMirrored = 0x80000000L;
192
193
194
195 private static final long
196 maskOtherLowercase = 0x100000000L,
197 maskOtherUppercase = 0x200000000L,
198 maskOtherAlphabetic = 0x400000000L,
199 maskOtherMath = 0x800000000L,
200 maskIdeographic = 0x1000000000L,
201 maskNoncharacterCP = 0x2000000000L;
202
203
204
205 public static int
206 valueNotNumeric = 0x0000,
207 valueDigit = 0x0400,
208 valueStrangeNumeric = 0x0800,
209 valueJavaSupradecimal = 0x0C00,
210 valueIgnorable = 0x1000,
211 valueJavaOnlyPart = 0x2000,
212 valueJavaUnicodePart = 0x3000,
213 valueJavaWhitespace = 0x4000,
214 valueJavaStartUnicodePart = 0x5000,
215 valueJavaOnlyStart = 0x6000,
216 valueJavaUnicodeStart = 0x7000,
217 lowJavaStart = 0x5000,
218 nonzeroJavaPart = 0x3000,
219 valueUnicodeStart = 0x7000;
220
221
222
223 private static final int
224 bitJavaStart = 0x02,
225 bitJavaPart = 0x01,
226 maskIsJavaIdentifierPart = bitJavaPart,
227 maskIsJavaIdentifierStart = bitJavaStart;
228
229 static int maxOffset = maskCase/2 ;
230 static int minOffset = -maxOffset;
231
232
233
234
235
236
237
238
239
240
241 static String hex(long n) { return Long.toHexString(n).toUpperCase(); }
242
243 static String hex2(long n) {
244 String q = Long.toHexString(n & 0xFF).toUpperCase();
245 return "00".substring(Math.min(2, q.length())) + q;
246 }
247
248 static String hex4(long n) {
249 String q = Long.toHexString(n & 0xFFFF).toUpperCase();
250 return "0000".substring(Math.min(4, q.length())) + q;
251 }
252
253 static String hex8(long n) {
254 String q = Long.toHexString(n & 0xFFFFFFFFL).toUpperCase();
255 return "00000000".substring(Math.min(8, q.length())) + q;
256 }
257
258 static String hex16(long n) {
259 String q = Long.toHexString(n).toUpperCase();
260 return "0000000000000000".substring(Math.min(16, q.length())) + q;
261 }
262
263 static String dec3(long n) {
264 String q = Long.toString(n);
265 return " ".substring(Math.min(3, q.length())) + q;
266 }
267
268 static String dec5(long n) {
269 String q = Long.toString(n);
270 return " ".substring(Math.min(5, q.length())) + q;
271 }
272
273
274
275 static void FAIL(String s) {
276 System.out.println("** " + s);
277 }
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309 static long[] buildMap(UnicodeSpec[] data, SpecialCaseMap[] specialMaps, PropList propList)
310 {
311 long[] result;
312 if (bLatin1 == true) {
313 result = new long[256];
314 } else {
315 result = new long[1<<16];
316 }
317 int k=0;
318 int codePoint = plane<<16;
319 UnicodeSpec nonCharSpec = new UnicodeSpec();
320 for (int j = 0; j < data.length && k < result.length; j++) {
321 if (data[j].codePoint == codePoint) {
322 result[k] = buildOne(codePoint, data[j], specialMaps);
323 ++k;
324 ++codePoint;
325 }
326 else if(data[j].codePoint > codePoint) {
327 if (data[j].name.endsWith("Last>")) {
328
329 while (codePoint < data[j].codePoint && k < result.length) {
330 result[k] = buildOne(codePoint, data[j], specialMaps);
331 ++k;
332 ++codePoint;
333 }
334 }
335 else {
336
337 while (codePoint < data[j].codePoint && k < result.length) {
338 result[k] = buildOne(codePoint, nonCharSpec, specialMaps);
339 ++k;
340 ++codePoint;
341 }
342 }
343 k = data[j].codePoint & 0xFFFF;
344 codePoint = data[j].codePoint;
345 result[k] = buildOne(codePoint, data[j], specialMaps);
346 ++k;
347 ++codePoint;
348 }
349 else {
350 System.out.println("An error has occured during spec mapping.");
351 System.exit(0);
352 }
353 }
354
355
356 codePoint = (plane<<16) | k;
357 while (k < result.length) {
358 result[k] = buildOne(codePoint, nonCharSpec, specialMaps);
359 ++k;
360 ++codePoint;
361 }
362
363
364 addExProp(result, propList, "Other_Lowercase", maskOtherLowercase);
365 addExProp(result, propList, "Other_Uppercase", maskOtherUppercase);
366 addExProp(result, propList, "Other_Alphabetic", maskOtherAlphabetic);
367 addExProp(result, propList, "Ideographic", maskIdeographic);
368
369
370
371 return result;
372 }
373
374
375 static int maxOffsetSeen = 0;
376 static int minOffsetSeen = 0;
377
378
379
380
381
382
383 static boolean isInvalidJavaWhiteSpace(int c) {
384 int[] exceptions = {0x00A0, 0x2007, 0x202F, 0xFEFF};
385 boolean retValue = false;
386 for(int x=0;x<exceptions.length;x++) {
387 if(c == exceptions[x]) {
388 retValue = true;
389 break;
390 }
391 }
392 return retValue;
393
394 }
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412 static long buildOne(int c, UnicodeSpec us, SpecialCaseMap[] specialMaps) {
413 long resultA = 0;
414
415 resultA |= us.generalCategory;
416
417
418 NUMERIC: {
419 STRANGE: {
420 int val = 0;
421
422 if ((c >= 0x0041) && (c <= 0x005A)) {
423 val = c - 0x0041;
424 resultA |= valueJavaSupradecimal;
425
426 } else if ((c >= 0x0061) && (c <= 0x007A)) {
427 val = c - 0x0061;
428 resultA |= valueJavaSupradecimal;
429
430 } else if ((c >= 0xFF21) && (c <= 0xFF3A)) {
431 val = c - 0xFF21;
432 resultA |= valueJavaSupradecimal;
433
434 } else if ((c >= 0xFF41) && (c <= 0xFF5A)) {
435 val = c - 0xFF41;
436 resultA |= valueJavaSupradecimal;
437 } else if (us.isDecimalValue()) {
438 val = us.decimalValue;
439 resultA |= valueDigit;
440 } else if (us.isDigitValue()) {
441 val = us.digitValue;
442 resultA |= valueDigit;
443 } else {
444 if (us.numericValue.length() == 0) {
445 break NUMERIC;
446 } else {
447 try {
448 val = Integer.parseInt(us.numericValue);
449 if (val >= 32 || val < 0) break STRANGE;
450 if (c == 0x215F) break STRANGE;
451 } catch(NumberFormatException e) {
452 break STRANGE;
453 }
454 resultA |= valueDigit;
455 }
456 }
457 if (val >= 32 || val < 0) break STRANGE;
458 resultA |= ((val - c & maskDigit) << shiftDigitOffset);
459 break NUMERIC;
460 }
461 resultA |= valueStrangeNumeric;
462 }
463
464
465 int offset = 0;
466
467 int specialMap = SpecialCaseMap.find(c, specialCaseMaps);
468 boolean bHasUpper = (us.hasUpperMap()) || (specialMap != -1);
469 if (bHasUpper) {
470 resultA |= maskUpperCase;
471 }
472 if (specialMap != -1) {
473
474
475
476 offset = -1;
477 }
478 else if (us.hasUpperMap()) {
479 offset = c - us.upperMap;
480 }
481
482 if (us.hasLowerMap()) {
483 resultA |= maskLowerCase;
484 if (offset == 0)
485 offset = us.lowerMap - c;
486 else if (offset != (us.lowerMap - c)) {
487 if (DEBUG) {
488 FAIL("Character " + hex(c) +
489 " has incompatible lowercase and uppercase mappings");
490 }
491 }
492 }
493 if ((us.hasTitleMap() && us.titleMap != us.upperMap) ||
494 (bHasUpper && us.hasLowerMap())) {
495 resultA |= maskTitleCase;
496 }
497 if (bHasUpper && !us.hasLowerMap() && !us.hasTitleMap() && verbose) {
498 System.out.println("Warning: Character " + hex4(c) + " has upper but " +
499 "no title case; Java won't know this");
500 }
501 if (offset < minOffsetSeen) minOffsetSeen = offset;
502 if (offset > maxOffsetSeen) maxOffsetSeen = offset;
503 if (offset > maxOffset || offset < minOffset) {
504 if (DEBUG) {
505 FAIL("Case offset " + offset + " for character " + hex4(c) + " must be handled as a special case");
506 }
507 offset = maskCase;
508 }
509 resultA |= ((offset & maskCase) << shiftCaseOffset);
510
511
512 if (us.generalCategory == UnicodeSpec.LOWERCASE_LETTER
513 || us.generalCategory == UnicodeSpec.UPPERCASE_LETTER
514 || us.generalCategory == UnicodeSpec.TITLECASE_LETTER
515 || us.generalCategory == UnicodeSpec.MODIFIER_LETTER
516 || us.generalCategory == UnicodeSpec.OTHER_LETTER
517 || us.generalCategory == UnicodeSpec.LETTER_NUMBER) {
518 resultA |= valueJavaUnicodeStart;
519 }
520 else if (us.generalCategory == UnicodeSpec.COMBINING_SPACING_MARK
521 || us.generalCategory == UnicodeSpec.NON_SPACING_MARK
522 || us.generalCategory == UnicodeSpec.DECIMAL_DIGIT_NUMBER) {
523 resultA |= valueJavaUnicodePart;
524 }
525 else if (us.generalCategory == UnicodeSpec.CONNECTOR_PUNCTUATION) {
526 resultA |= valueJavaStartUnicodePart;
527 }
528 else if (us.generalCategory == UnicodeSpec.CURRENCY_SYMBOL) {
529 resultA |= valueJavaOnlyStart;
530 }
531 else if (((c >= 0x0000) && (c <= 0x0008))
532 || ((c >= 0x000E) && (c <= 0x001B))
533 || ((c >= 0x007F) && (c <= 0x009F))
534 || us.generalCategory == UnicodeSpec.FORMAT) {
535 resultA |= valueIgnorable;
536 }
537 else if (us.generalCategory == UnicodeSpec.SPACE_SEPARATOR
538 || us.generalCategory == UnicodeSpec.LINE_SEPARATOR
539 || us.generalCategory == UnicodeSpec.PARAGRAPH_SEPARATOR) {
540 if (!isInvalidJavaWhiteSpace(c)) resultA |= valueJavaWhitespace;
541 }
542 else if (((c >= 0x0009) && (c <= 0x000D))
543 || ((c >= 0x001C) && (c <= 0x001F))) {
544 resultA |= valueJavaWhitespace;
545 }
546
547
548 if (!nobidi) {
549 int tmpBidi =
550 (us.bidiCategory > UnicodeSpec.DIRECTIONALITY_OTHER_NEUTRALS ||
551 us.bidiCategory == -1) ? maskBidi : (us.bidiCategory << shiftBidi);
552 resultA |= tmpBidi;
553 }
554
555
556 if (!nomirror) {
557 resultA |= us.mirrored ? maskMirrored : 0;
558 }
559
560 if (identifiers) {
561 long replacement = 0;
562 if ((resultA & maskIdentifierInfo) >= lowJavaStart) {
563 replacement |= bitJavaStart;
564 }
565 if ( ((resultA & nonzeroJavaPart) != 0)
566 && ((resultA & maskIdentifierInfo) != valueIgnorable)) {
567 replacement |= bitJavaPart;
568 }
569 resultA = replacement;
570 }
571 return resultA;
572 }
573
574 static void addExProp(long[] map, PropList propList, String prop, long mask) {
575 List<Integer> cps = propList.codepoints(prop);
576 if (cps != null) {
577 for (Integer cp : cps) {
578 if (cp < map.length)
579 map[cp] |= mask;
580 }
581 }
582 }
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618 static long[][] buildTable(long[] map, int size) {
619 int n = map.length;
620 if (((n >> size) << size) != n) {
621 FAIL("Length " + n + " is not a multiple of " + (1 << size));
622 }
623 int m = 1 << size;
624
625 long[] newmap = new long[n >> size];
626
627
628 long[] buffer = new long[n];
629 int ptr = 0;
630 OUTER: for (int i = 0; i < n; i += m) {
631
632 MIDDLE: for (int j = 0; j < ptr; j += m) {
633
634 for (int k = 0; k < m; k++) {
635 if (buffer[j+k] != map[i+k])
636 continue MIDDLE;
637 }
638
639
640 newmap[i >> size] = (j >> size);
641 continue OUTER;
642 }
643
644
645 for (int k = 0; k < m; k++) {
646 buffer[ptr+k] = map[i+k];
647 }
648 newmap[i >> size] = (ptr >> size);
649 ptr += m;
650 }
651
652
653 long[] newdata = new long[ptr];
654 for (int j = 0; j < ptr; j++) {
655 newdata[j] = buffer[j];
656 }
657
658 long[][] result = { newmap, newdata };
659 return result;
660 }
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689 static void generateCharacterClass(String theTemplateFileName,
690 String theOutputFileName)
691 throws FileNotFoundException, IOException {
692 BufferedReader in = new BufferedReader(new FileReader(theTemplateFileName));
693 PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(theOutputFileName)));
694 out.println(commentStart +
695 " This file was generated AUTOMATICALLY from a template file " +
696 new java.util.Date() + commentEnd);
697 int marklen = commandMarker.length();
698 LOOP: while(true) {
699 try {
700 String line = in.readLine();
701 if (line == null) break LOOP;
702 int pos = 0;
703 int depth = 0;
704 while ((pos = line.indexOf(commandMarker, pos)) >= 0) {
705 int newpos = pos + marklen;
706 char ch = 'x';
707 SCAN: while (newpos < line.length() &&
708 (Character.isJavaIdentifierStart(ch = line.charAt(newpos))
709 || ch == '(' || (ch == ')' && depth > 0))) {
710 ++newpos;
711 if (ch == '(') {
712 ++depth;
713 }
714 else if (ch == ')') {
715 --depth;
716 if (depth == 0)
717 break SCAN;
718 }
719 }
720 String replacement = replaceCommand(line.substring(pos + marklen, newpos));
721 line = line.substring(0, pos) + replacement + line.substring(newpos);
722 pos += replacement.length();
723 }
724 out.println(line);
725 }
726 catch (IOException e) {
727 break LOOP;
728 }
729 }
730 in.close();
731 out.close();
732 }
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758 static String replaceCommand(String x) {
759 if (x.equals("Tables")) return genTables();
760 if (x.equals("Initializers")) return genInitializers();
761 if (x.length() >= 9 && x.substring(0, 7).equals("Lookup(") &&
762 x.substring(x.length()-1).equals(")") )
763 return genAccess("A", x.substring(7, x.length()-1), (identifiers ? 2 : 32));
764 if (x.length() >= 11 && x.substring(0, 9).equals("LookupEx(") &&
765 x.substring(x.length()-1).equals(")") )
766 return genAccess("B", x.substring(9, x.length()-1), 16);
767 if (x.equals("shiftType")) return Long.toString(shiftType);
768 if (x.equals("shiftIdentifierInfo")) return Long.toString(shiftIdentifierInfo);
769 if (x.equals("maskIdentifierInfo")) return "0x" + hex8(maskIdentifierInfo);
770 if (x.equals("maskUnicodePart")) return "0x" + hex8(maskUnicodePart);
771 if (x.equals("shiftCaseOffset")) return Long.toString(shiftCaseOffset);
772 if (x.equals("shiftCaseInfo")) return Long.toString(shiftCaseInfo);
773 if (x.equals("shiftCaseOffsetSign")) return Long.toString(shiftCaseOffsetSign);
774 if (x.equals("maskCase")) return "0x" + hex8(maskCase);
775 if (x.equals("maskCaseOffset")) return "0x" + hex8(maskCaseOffset);
776 if (x.equals("maskLowerCase")) return "0x" + hex8(maskLowerCase);
777 if (x.equals("maskUpperCase")) return "0x" + hex8(maskUpperCase);
778 if (x.equals("maskTitleCase")) return "0x" + hex8(maskTitleCase);
779 if (x.equals("maskOtherLowercase")) return "0x" + hex4(maskOtherLowercase >> 32);
780 if (x.equals("maskOtherUppercase")) return "0x" + hex4(maskOtherUppercase >> 32);
781 if (x.equals("maskOtherAlphabetic")) return "0x" + hex4(maskOtherAlphabetic >> 32);
782 if (x.equals("maskIdeographic")) return "0x" + hex4(maskIdeographic >> 32);
783 if (x.equals("valueIgnorable")) return "0x" + hex8(valueIgnorable);
784 if (x.equals("valueJavaUnicodeStart")) return "0x" + hex8(valueJavaUnicodeStart);
785 if (x.equals("valueJavaOnlyStart")) return "0x" + hex8(valueJavaOnlyStart);
786 if (x.equals("valueJavaUnicodePart")) return "0x" + hex8(valueJavaUnicodePart);
787 if (x.equals("valueJavaOnlyPart")) return "0x" + hex8(valueJavaOnlyPart);
788 if (x.equals("valueJavaWhitespace")) return "0x" + hex8(valueJavaWhitespace);
789 if (x.equals("lowJavaStart")) return "0x" + hex8(lowJavaStart);
790 if (x.equals("nonzeroJavaPart")) return "0x" + hex8(nonzeroJavaPart);
791 if (x.equals("bitJavaStart")) return "0x" + hex8(bitJavaStart);
792 if (x.equals("bitJavaPart")) return Long.toString(bitJavaPart);
793 if (x.equals("valueUnicodeStart")) return "0x" + hex8(valueUnicodeStart);
794 if (x.equals("maskIsJavaIdentifierStart")) return "0x" + hex(maskIsJavaIdentifierStart);
795 if (x.equals("maskIsJavaIdentifierPart")) return "0x" + hex(maskIsJavaIdentifierPart);
796 if (x.equals("shiftDigitOffset")) return Long.toString(shiftDigitOffset);
797 if (x.equals("maskDigitOffset")) return "0x" + hex(maskDigitOffset);
798 if (x.equals("maskDigit")) return "0x" + hex(maskDigit);
799 if (x.equals("shiftNumericType")) return Long.toString(shiftNumericType);
800 if (x.equals("maskNumericType")) return "0x" + hex(maskNumericType);
801 if (x.equals("valueNotNumeric")) return "0x" + hex8(valueNotNumeric);
802 if (x.equals("valueDigit")) return "0x" + hex8(valueDigit);
803 if (x.equals("valueStrangeNumeric")) return "0x" + hex8(valueStrangeNumeric);
804 if (x.equals("valueJavaSupradecimal")) return "0x" + hex8(valueJavaSupradecimal);
805 if (x.equals("valueDigit")) return "0x" + hex8(valueDigit);
806 if (x.equals("valueStrangeNumeric")) return "0x" + hex8(valueStrangeNumeric);
807 if (x.equals("maskType")) return "0x" + hex(maskType);
808 if (x.equals("shiftBidi")) return Long.toString(shiftBidi);
809 if (x.equals("maskBidi")) return "0x" + hex(maskBidi);
810 if (x.equals("maskMirrored")) return "0x" + hex8(maskMirrored);
811 if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.UNASSIGNED][UnicodeSpec.LONG]))
812 return Integer.toString(UnicodeSpec.UNASSIGNED);
813 if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.UPPERCASE_LETTER][UnicodeSpec.LONG]))
814 return Integer.toString(UnicodeSpec.UPPERCASE_LETTER);
815 if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.LOWERCASE_LETTER][UnicodeSpec.LONG]))
816 return Integer.toString(UnicodeSpec.LOWERCASE_LETTER);
817 if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.TITLECASE_LETTER][UnicodeSpec.LONG]))
818 return Integer.toString(UnicodeSpec.TITLECASE_LETTER);
819 if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.MODIFIER_LETTER][UnicodeSpec.LONG]))
820 return Integer.toString(UnicodeSpec.MODIFIER_LETTER);
821 if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.OTHER_LETTER][UnicodeSpec.LONG]))
822 return Integer.toString(UnicodeSpec.OTHER_LETTER);
823 if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.NON_SPACING_MARK][UnicodeSpec.LONG]))
824 return Integer.toString(UnicodeSpec.NON_SPACING_MARK);
825 if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.ENCLOSING_MARK][UnicodeSpec.LONG]))
826 return Integer.toString(UnicodeSpec.ENCLOSING_MARK);
827 if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.COMBINING_SPACING_MARK][UnicodeSpec.LONG]))
828 return Integer.toString(UnicodeSpec.COMBINING_SPACING_MARK);
829 if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.DECIMAL_DIGIT_NUMBER][UnicodeSpec.LONG]))
830 return Integer.toString(UnicodeSpec.DECIMAL_DIGIT_NUMBER);
831 if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.OTHER_NUMBER][UnicodeSpec.LONG]))
832 return Integer.toString(UnicodeSpec.OTHER_NUMBER);
833 if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.SPACE_SEPARATOR][UnicodeSpec.LONG]))
834 return Integer.toString(UnicodeSpec.SPACE_SEPARATOR);
835 if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.LINE_SEPARATOR][UnicodeSpec.LONG]))
836 return Integer.toString(UnicodeSpec.LINE_SEPARATOR);
837 if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.PARAGRAPH_SEPARATOR][UnicodeSpec.LONG]))
838 return Integer.toString(UnicodeSpec.PARAGRAPH_SEPARATOR);
839 if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.CONTROL][UnicodeSpec.LONG]))
840 return Integer.toString(UnicodeSpec.CONTROL);
841 if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.FORMAT][UnicodeSpec.LONG]))
842 return Integer.toString(UnicodeSpec.FORMAT);
843 if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.PRIVATE_USE][UnicodeSpec.LONG]))
844 return Integer.toString(UnicodeSpec.PRIVATE_USE);
845 if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.SURROGATE][UnicodeSpec.LONG]))
846 return Integer.toString(UnicodeSpec.SURROGATE);
847 if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.DASH_PUNCTUATION][UnicodeSpec.LONG]))
848 return Integer.toString(UnicodeSpec.DASH_PUNCTUATION);
849 if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.START_PUNCTUATION][UnicodeSpec.LONG]))
850 return Integer.toString(UnicodeSpec.START_PUNCTUATION);
851 if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.END_PUNCTUATION][UnicodeSpec.LONG]))
852 return Integer.toString(UnicodeSpec.END_PUNCTUATION);
853 if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.INITIAL_QUOTE_PUNCTUATION][UnicodeSpec.LONG]))
854 return Integer.toString(UnicodeSpec.INITIAL_QUOTE_PUNCTUATION);
855 if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.FINAL_QUOTE_PUNCTUATION][UnicodeSpec.LONG]))
856 return Integer.toString(UnicodeSpec.FINAL_QUOTE_PUNCTUATION);
857 if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.CONNECTOR_PUNCTUATION][UnicodeSpec.LONG]))
858 return Integer.toString(UnicodeSpec.CONNECTOR_PUNCTUATION);
859 if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.OTHER_PUNCTUATION][UnicodeSpec.LONG]))
860 return Integer.toString(UnicodeSpec.OTHER_PUNCTUATION);
861 if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.LETTER_NUMBER][UnicodeSpec.LONG]))
862 return Integer.toString(UnicodeSpec.LETTER_NUMBER);
863 if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.MATH_SYMBOL][UnicodeSpec.LONG]))
864 return Integer.toString(UnicodeSpec.MATH_SYMBOL);
865 if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.CURRENCY_SYMBOL][UnicodeSpec.LONG]))
866 return Integer.toString(UnicodeSpec.CURRENCY_SYMBOL);
867 if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.MODIFIER_SYMBOL][UnicodeSpec.LONG]))
868 return Integer.toString(UnicodeSpec.MODIFIER_SYMBOL);
869 if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.OTHER_SYMBOL][UnicodeSpec.LONG]))
870 return Integer.toString(UnicodeSpec.OTHER_SYMBOL);
871 if (x.equals(UnicodeSpec.bidiCategoryList[UnicodeSpec.DIRECTIONALITY_LEFT_TO_RIGHT][UnicodeSpec.LONG]))
872 return Integer.toString(UnicodeSpec.DIRECTIONALITY_LEFT_TO_RIGHT);
873 if (x.equals(UnicodeSpec.bidiCategoryList[UnicodeSpec.DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING][UnicodeSpec.LONG]))
874 return Integer.toString(UnicodeSpec.DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING);
875 if (x.equals(UnicodeSpec.bidiCategoryList[UnicodeSpec.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE][UnicodeSpec.LONG]))
876 return Integer.toString(UnicodeSpec.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE);
877 if (x.equals(UnicodeSpec.bidiCategoryList[UnicodeSpec.DIRECTIONALITY_RIGHT_TO_LEFT][UnicodeSpec.LONG]))
878 return Integer.toString(UnicodeSpec.DIRECTIONALITY_RIGHT_TO_LEFT);
879 if (x.equals(UnicodeSpec.bidiCategoryList[UnicodeSpec.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC][UnicodeSpec.LONG]))
880 return Integer.toString(UnicodeSpec.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC);
881 if (x.equals(UnicodeSpec.bidiCategoryList[UnicodeSpec.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING][UnicodeSpec.LONG]))
882 return Integer.toString(UnicodeSpec.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING);
883 if (x.equals(UnicodeSpec.bidiCategoryList[UnicodeSpec.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE][UnicodeSpec.LONG]))
884 return Integer.toString(UnicodeSpec.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE);
885 if (x.equals(UnicodeSpec.bidiCategoryList[UnicodeSpec.DIRECTIONALITY_POP_DIRECTIONAL_FORMAT][UnicodeSpec.LONG]))
886 return Integer.toString(UnicodeSpec.DIRECTIONALITY_POP_DIRECTIONAL_FORMAT);
887 if (x.equals(UnicodeSpec.bidiCategoryList[UnicodeSpec.DIRECTIONALITY_EUROPEAN_NUMBER][UnicodeSpec.LONG]))
888 return Integer.toString(UnicodeSpec.DIRECTIONALITY_EUROPEAN_NUMBER);
889 if (x.equals(UnicodeSpec.bidiCategoryList[UnicodeSpec.DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR][UnicodeSpec.LONG]))
890 return Integer.toString(UnicodeSpec.DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR);
891 if (x.equals(UnicodeSpec.bidiCategoryList[UnicodeSpec.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR][UnicodeSpec.LONG]))
892 return Integer.toString(UnicodeSpec.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR);
893 if (x.equals(UnicodeSpec.bidiCategoryList[UnicodeSpec.DIRECTIONALITY_ARABIC_NUMBER][UnicodeSpec.LONG]))
894 return Integer.toString(UnicodeSpec.DIRECTIONALITY_ARABIC_NUMBER);
895 if (x.equals(UnicodeSpec.bidiCategoryList[UnicodeSpec.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR][UnicodeSpec.LONG]))
896 return Integer.toString(UnicodeSpec.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR);
897 if (x.equals(UnicodeSpec.bidiCategoryList[UnicodeSpec.DIRECTIONALITY_NONSPACING_MARK][UnicodeSpec.LONG]))
898 return Integer.toString(UnicodeSpec.DIRECTIONALITY_NONSPACING_MARK);
899 if (x.equals(UnicodeSpec.bidiCategoryList[UnicodeSpec.DIRECTIONALITY_BOUNDARY_NEUTRAL][UnicodeSpec.LONG]))
900 return Integer.toString(UnicodeSpec.DIRECTIONALITY_BOUNDARY_NEUTRAL);
901 if (x.equals(UnicodeSpec.bidiCategoryList[UnicodeSpec.DIRECTIONALITY_PARAGRAPH_SEPARATOR][UnicodeSpec.LONG]))
902 return Integer.toString(UnicodeSpec.DIRECTIONALITY_PARAGRAPH_SEPARATOR);
903 if (x.equals(UnicodeSpec.bidiCategoryList[UnicodeSpec.DIRECTIONALITY_SEGMENT_SEPARATOR][UnicodeSpec.LONG]))
904 return Integer.toString(UnicodeSpec.DIRECTIONALITY_SEGMENT_SEPARATOR);
905 if (x.equals(UnicodeSpec.bidiCategoryList[UnicodeSpec.DIRECTIONALITY_WHITESPACE][UnicodeSpec.LONG]))
906 return Integer.toString(UnicodeSpec.DIRECTIONALITY_WHITESPACE);
907 if (x.equals(UnicodeSpec.bidiCategoryList[UnicodeSpec.DIRECTIONALITY_OTHER_NEUTRALS][UnicodeSpec.LONG]))
908 return Integer.toString(UnicodeSpec.DIRECTIONALITY_OTHER_NEUTRALS);
909 FAIL("Unknown text substitution marker " + commandMarker + x);
910 return commandMarker + x;
911 }
912
913
914
915
916
917
918
919
920
921
922
923
924 static String genTables() {
925 int n = sizes.length;
926 StringBuffer result = new StringBuffer();
927
928 result.append(commentStart + " The following tables and code generated using:" +
929 commentEnd + "\n ");
930 result.append(commentStart + ' ' + commandLineDescription + commentEnd + "\n ");
931
932 if (plane == 0 && bLatin1 == false) {
933 genCaseMapTableDeclaration(result);
934 genCaseMapTable(initializers, specialCaseMaps);
935 }
936 int totalBytes = 0;
937 for (int k = 0; k < n - 1; k++) {
938 genTable(result, tableNames[k], tables[k], 0, bytes[k]<<3, sizes[k], preshifted[k],
939 sizes[k+1], false, false, k==0);
940 int s = bytes[k];
941 if (s == 1 && useCharForByte) {
942 s = 2;
943 }
944 totalBytes += tables[k].length * s;
945 }
946 genTable(result, "A", tables[n - 1], 0, (identifiers ? 2 : 32),
947 sizes[n - 1], false, 0, true, !(identifiers), false);
948
949
950
951 genTable(result, "B", tables[n - 1], 32, 16, sizes[n - 1], false, 0, true, true, false);
952
953 totalBytes += ((((tables[n - 1].length * (identifiers ? 2 : 32)) + 31) >> 5) << 2);
954 result.append(commentStart);
955 result.append(" In all, the character property tables require ");
956 result.append(totalBytes).append(" bytes.").append(commentEnd);
957 if (verbose) {
958 System.out.println("The character property tables require "
959 + totalBytes + " bytes.");
960 }
961 return result.toString();
962 }
963
964
965
966
967
968
969 static String genInitializers() {
970 return initializers.toString();
971 }
972
973
974
975
976
977 static int getTotalBytes() {
978 int n = sizes.length;
979 int totalBytes = 0;
980 for (int k = 0; k < n - 1; k++) {
981 totalBytes += tables[k].length * bytes[k];
982 }
983 totalBytes += ((((tables[n - 1].length * (identifiers ? 2 : 32))
984 + 31) >> 5) << 2);
985 return totalBytes;
986 }
987
988 static void appendEscapedStringFragment(StringBuffer result,
989 char[] line,
990 int length,
991 boolean lastFragment) {
992 result.append(" \"");
993 for (int k=0; k<length; ++k) {
994 result.append("\\u");
995 result.append(hex4(line[k]));
996 }
997 result.append("\"");
998 result.append(lastFragment ? ";" : "+");
999 result.append("\n");
1000 }
1001
1002 static String SMALL_INITIALIZER =
1003 " { // THIS CODE WAS AUTOMATICALLY CREATED BY GenerateCharacter:\n"+
1004
1005 " int len = $$name_DATA.length();\n"+
1006 " int j=0;\n"+
1007 " for (int i=0; i<len; ++i) {\n"+
1008 " int c = $$name_DATA.charAt(i);\n"+
1009 " for (int k=0; k<$$entriesPerChar; ++k) {\n"+
1010 " $$name[j++] = ($$type)c;\n"+
1011 " c >>= $$bits;\n"+
1012 " }\n"+
1013 " }\n"+
1014 " assert (j == $$size);\n"+
1015 " }\n";
1016
1017 static String SAME_SIZE_INITIALIZER =
1018 " { // THIS CODE WAS AUTOMATICALLY CREATED BY GenerateCharacter:\n"+
1019 " assert ($$name_DATA.length() == $$size);\n"+
1020
1021 " for (int i=0; i<$$size; ++i)\n"+
1022 " $$name[i] = ($$type)$$name_DATA.charAt(i);\n"+
1023 " }\n";
1024
1025 static String BIG_INITIALIZER =
1026 " { // THIS CODE WAS AUTOMATICALLY CREATED BY GenerateCharacter:\n"+
1027
1028 " int len = $$name_DATA.length();\n"+
1029 " int j=0;\n"+
1030 " int charsInEntry=0;\n"+
1031 " $$type entry=0;\n"+
1032 " for (int i=0; i<len; ++i) {\n"+
1033 " entry |= $$name_DATA.charAt(i);\n"+
1034 " if (++charsInEntry == $$charsPerEntry) {\n"+
1035 " $$name[j++] = entry;\n"+
1036 " entry = 0;\n"+
1037 " charsInEntry = 0;\n"+
1038 " }\n"+
1039 " else {\n"+
1040 " entry <<= 16;\n"+
1041 " }\n"+
1042 " }\n"+
1043 " assert (j == $$size);\n"+
1044 " }\n";
1045
1046 static String INT32_INITIALIZER =
1047 " { // THIS CODE WAS AUTOMATICALLY CREATED BY GenerateCharacter:\n"+
1048 " char[] data = $$name_DATA.toCharArray();\n"+
1049 " assert (data.length == ($$size * 2));\n"+
1050 " int i = 0, j = 0;\n"+
1051 " while (i < ($$size * 2)) {\n"+
1052 " int entry = data[i++] << 16;\n"+
1053 " $$name[j++] = entry | data[i++];\n"+
1054 " }\n"+
1055 " }\n";
1056
1057 static void addInitializer(String name, String type, int entriesPerChar,
1058 int bits, int size) {
1059
1060 String template = (entriesPerChar == 1) ? SAME_SIZE_INITIALIZER :
1061 ((entriesPerChar > 0) ? SMALL_INITIALIZER : BIG_INITIALIZER);
1062 if (entriesPerChar == -2) {
1063 template = INT32_INITIALIZER;
1064 }
1065 int marklen = commandMarker.length();
1066 int pos = 0;
1067 while ((pos = template.indexOf(commandMarker, pos)) >= 0) {
1068 int newpos = pos + marklen;
1069 char ch = 'x';
1070 while (newpos < template.length() &&
1071 Character.isJavaIdentifierStart(ch = template.charAt(newpos)) &&
1072 ch != '_')
1073 ++newpos;
1074 String token = template.substring(pos+marklen, newpos);
1075 String replacement = "ERROR";
1076
1077 if (token.equals("name")) replacement = name;
1078 else if (token.equals("type")) replacement = type;
1079 else if (token.equals("bits")) replacement = ""+bits;
1080 else if (token.equals("size")) replacement = ""+size;
1081 else if (token.equals("entriesPerChar")) replacement = ""+entriesPerChar;
1082 else if (token.equals("charsPerEntry")) replacement = ""+(-entriesPerChar);
1083 else FAIL("Unrecognized token: " + token);
1084
1085 template = template.substring(0, pos) + replacement + template.substring(newpos);
1086 pos += replacement.length();
1087 }
1088 initializers.append(template);
1089 }
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131 static void genTable(StringBuffer result, String name,
1132 long[] table, int extract, int bits, int size,
1133 boolean preshifted, int shift, boolean hexFormat,
1134 boolean properties, boolean hexComment) {
1135
1136 String atype = bits == 1 ? (Csyntax ? "unsigned long" : "int") :
1137 bits == 2 ? (Csyntax ? "unsigned long" : "int") :
1138 bits == 4 ? (Csyntax ? "unsigned long" : "int") :
1139 bits == 8 ? (Csyntax ? "unsigned char" : "byte") :
1140 bits == 16 ? (Csyntax ? "unsigned short" : "char") :
1141 bits == 32 ? (Csyntax ? "unsigned long" : "int") :
1142 (Csyntax ? "int64" : "long");
1143 long maxPosEntry = bits == 1 ? Integer.MAX_VALUE :
1144 bits == 2 ? Integer.MAX_VALUE :
1145 bits == 4 ? Integer.MAX_VALUE :
1146 bits == 8 ? Byte.MAX_VALUE :
1147 bits == 16 ? Short.MAX_VALUE :
1148 bits == 32 ? Integer.MAX_VALUE :
1149 Long.MAX_VALUE;
1150 int entriesPerChar = bits <= 16 ? (16 / bits) : -(bits / 16);
1151 boolean shiftEntries = preshifted && shift != 0;
1152 if (bits == 8 && tableAsString && useCharForByte) {
1153 atype = "char";
1154 maxPosEntry = Character.MAX_VALUE;
1155 entriesPerChar = 1;
1156 }
1157 boolean noConversion = atype.equals("char");
1158
1159 result.append(commentStart);
1160 result.append(" The ").append(name).append(" table has ").append(table.length);
1161 result.append(" entries for a total of ");
1162 int sizeOfTable = ((table.length * bits + 31) >> 5) << 2;
1163 if (bits == 8 && useCharForByte) {
1164 sizeOfTable *= 2;
1165 }
1166 result.append(sizeOfTable);
1167 result.append(" bytes.").append(commentEnd).append("\n\n");
1168 if (Csyntax)
1169 result.append(" static ");
1170 else
1171 result.append(" static final ");
1172 result.append(atype);
1173 result.append(" ").append(name).append("[");
1174 if (Csyntax)
1175 result.append(table.length >> (bits == 1 ? 5 : bits == 2 ? 4 : bits == 4 ? 3 : 0));
1176 if (tableAsString) {
1177 if (noConversion) {
1178 result.append("] = (\n");
1179 } else {
1180 result.append("] = new ").append(atype).append("["+table.length+"];\n ");
1181 result.append("static final String ").append(name).append("_DATA =\n");
1182 }
1183 int CHARS_PER_LINE = 8;
1184 StringBuffer theString = new StringBuffer();
1185 int entriesInCharSoFar = 0;
1186 char ch = '\u0000';
1187 int charsPerEntry = -entriesPerChar;
1188 for (int j=0; j<table.length; ++j) {
1189
1190 long entry;
1191 if ("A".equals(name))
1192 entry = (table[j] & 0xffffffffL) >> extract;
1193 else
1194 entry = (table[j] >> extract);
1195 if (shiftEntries) entry <<= shift;
1196 if (entry >= (1L << bits)) {
1197 FAIL("Entry too big");
1198 }
1199 if (entriesPerChar > 0) {
1200
1201 ch = (char)(((int)ch >> bits) | (entry << (entriesPerChar-1)*bits));
1202 ++entriesInCharSoFar;
1203 if (entriesInCharSoFar == entriesPerChar) {
1204
1205 theString.append(ch);
1206 entriesInCharSoFar = 0;
1207 ch = '\u0000';
1208 }
1209 }
1210 else {
1211
1212 for (int k=0; k<charsPerEntry; ++k) {
1213 ch = (char)(entry >> ((charsPerEntry-1)*16));
1214 entry <<= 16;
1215 theString.append(ch);
1216 }
1217 }
1218 }
1219 if (entriesInCharSoFar > 0) {
1220 while (entriesInCharSoFar < entriesPerChar) {
1221 ch = (char)((int)ch >> bits);
1222 ++entriesInCharSoFar;
1223 }
1224 theString.append(ch);
1225 entriesInCharSoFar = 0;
1226 }
1227 result.append(Utility.formatForSource(theString.toString(), " "));
1228 if (noConversion) {
1229 result.append(").toCharArray()");
1230 }
1231 result.append(";\n\n ");
1232
1233 if (!noConversion) {
1234 addInitializer(name, atype, entriesPerChar, bits, table.length);
1235 }
1236 }
1237 else {
1238 result.append("] = {");
1239 boolean castEntries = shiftEntries && (bits < 32);
1240 int printPerLine = hexFormat ? (bits == 1 ? 32*4 :
1241 bits == 2 ? 16*4 :
1242 bits == 4 ? 8*4 :
1243 bits == 8 ? 8 :
1244 bits == 16 ? 8 :
1245 bits == 32 ? 4 : 2) :
1246 (bits == 8 ? 8 :
1247 bits == 16 ? 8 : 4);
1248 int printMask = properties ? 0 :
1249 Math.min(1 << size,
1250 printPerLine >> (castEntries ? (Csyntax ? 2 : 1) : 0)) - 1;
1251 int commentShift = ((1 << size) == table.length) ? 0 : size;
1252 int commentMask = ((1 << size) == table.length) ? printMask : (1 << size) - 1;
1253 long val = 0;
1254 for (int j = 0; j < table.length; j++) {
1255 if ((j & printMask) == 0) {
1256 while (result.charAt(result.length() - 1) == ' ')
1257 result.setLength(result.length() - 1);
1258 result.append("\n ");
1259 }
1260 PRINT: {
1261 if (castEntries)
1262 result.append("(").append(atype).append(")(");
1263 long entry = table[j] >> extract;
1264 int packMask = ((1 << (bits == 1 ? 5 : bits == 2 ? 4 : bits == 4 ? 3 : 2)) - 1);
1265 int k = j & packMask;
1266 if (bits >= 8)
1267 val = entry;
1268 else if (k == 0) {
1269 val = entry;
1270 break PRINT;
1271 }
1272 else {
1273 val |= (entry << (k*bits));
1274 if (k != packMask)
1275 break PRINT;
1276 }
1277 if (val > maxPosEntry && !Csyntax) {
1278
1279
1280
1281 result.append('-');
1282 val = maxPosEntry + maxPosEntry + 2 - val;
1283 }
1284 if (hexFormat) {
1285 result.append("0x");
1286 if (bits == 8)
1287 result.append(hex2((byte)val));
1288 else if (bits == 16)
1289 result.append(hex4((short)val));
1290 else if (bits == 32 || bits < 8)
1291 result.append(hex8((int)val));
1292 else {
1293 result.append(hex16((long)val));
1294 if (!Csyntax)
1295 result.append("L");
1296 }
1297 }
1298 else {
1299 if (bits == 8)
1300 result.append(dec3(val));
1301 else if (bits == 64) {
1302 result.append(dec5(val));
1303 if (!Csyntax)
1304 result.append("L");
1305 }
1306 else
1307 result.append(dec5(val));
1308 }
1309 if (shiftEntries)
1310 result.append("<<").append(shift);
1311 if (castEntries) result.append(")");
1312 if (j < (table.length - 1))
1313 result.append(", ");
1314 else
1315 result.append(" ");
1316 if ((j & printMask) == printMask) {
1317 result.append(" ").append(commentStart).append(" ");
1318 if (hexComment)
1319 result.append("0x").append(hex4((j & ~commentMask) << (16 - size)));
1320 else
1321 result.append(dec3((j & ~commentMask) >> commentShift));
1322 if (properties) propertiesComments(result, val);
1323 result.append(commentEnd);
1324 }
1325 }
1326 }
1327 result.append("\n };\n\n ");
1328 }
1329 }
1330
1331 static void genCaseMapTableDeclaration(StringBuffer result) {
1332 String myTab = " ";
1333 result.append(myTab + "static final char[][][] charMap;\n");
1334 }
1335
1336 static void genCaseMapTable(StringBuffer result, SpecialCaseMap[] specialCaseMaps){
1337 String myTab = " ";
1338 int ch;
1339 char[] map;
1340 result.append(myTab + "charMap = new char[][][] {\n");
1341 for (int x = 0; x < specialCaseMaps.length; x++) {
1342 ch = specialCaseMaps[x].getCharSource();
1343 map = specialCaseMaps[x].getUpperCaseMap();
1344 result.append(myTab + myTab);
1345 result.append("{ ");
1346 result.append("{\'\\u"+hex4(ch)+"\'}, {");
1347 for (int y = 0; y < map.length; y++) {
1348 result.append("\'\\u"+hex4(map[y])+"\', ");
1349 }
1350 result.append("} },\n");
1351 }
1352 result.append(myTab + "};\n");
1353
1354 }
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367 static void propertiesComments(StringBuffer result, long val) {
1368 result.append(" ");
1369 switch ((int)(val & maskType)) {
1370 case UnicodeSpec.CONTROL:
1371 result.append("Cc");
1372 break;
1373 case UnicodeSpec.FORMAT:
1374 result.append("Cf");
1375 break;
1376 case UnicodeSpec.PRIVATE_USE:
1377 result.append("Co");
1378 break;
1379 case UnicodeSpec.SURROGATE:
1380 result.append("Cs");
1381 break;
1382 case UnicodeSpec.LOWERCASE_LETTER:
1383 result.append("Ll");
1384 break;
1385 case UnicodeSpec.MODIFIER_LETTER:
1386 result.append("Lm");
1387 break;
1388 case UnicodeSpec.OTHER_LETTER:
1389 result.append("Lo");
1390 break;
1391 case UnicodeSpec.TITLECASE_LETTER:
1392 result.append("Lt");
1393 break;
1394 case UnicodeSpec.UPPERCASE_LETTER:
1395 result.append("Lu");
1396 break;
1397 case UnicodeSpec.COMBINING_SPACING_MARK:
1398 result.append("Mc");
1399 break;
1400 case UnicodeSpec.ENCLOSING_MARK:
1401 result.append("Me");
1402 break;
1403 case UnicodeSpec.NON_SPACING_MARK:
1404 result.append("Mn");
1405 break;
1406 case UnicodeSpec.DECIMAL_DIGIT_NUMBER:
1407 result.append("Nd");
1408 break;
1409 case UnicodeSpec.LETTER_NUMBER:
1410 result.append("Nl");
1411 break;
1412 case UnicodeSpec.OTHER_NUMBER:
1413 result.append("No");
1414 break;
1415 case UnicodeSpec.CONNECTOR_PUNCTUATION:
1416 result.append("Pc");
1417 break;
1418 case UnicodeSpec.DASH_PUNCTUATION:
1419 result.append("Pd");
1420 break;
1421 case UnicodeSpec.END_PUNCTUATION:
1422 result.append("Pe");
1423 break;
1424 case UnicodeSpec.OTHER_PUNCTUATION:
1425 result.append("Po");
1426 break;
1427 case UnicodeSpec.START_PUNCTUATION:
1428 result.append("Ps");
1429 break;
1430 case UnicodeSpec.CURRENCY_SYMBOL:
1431 result.append("Sc");
1432 break;
1433 case UnicodeSpec.MODIFIER_SYMBOL:
1434 result.append("Sk");
1435 break;
1436 case UnicodeSpec.MATH_SYMBOL:
1437 result.append("Sm");
1438 break;
1439 case UnicodeSpec.OTHER_SYMBOL:
1440 result.append("So");
1441 break;
1442 case UnicodeSpec.LINE_SEPARATOR:
1443 result.append("Zl"); break;
1444 case UnicodeSpec.PARAGRAPH_SEPARATOR:
1445 result.append("Zp");
1446 break;
1447 case UnicodeSpec.SPACE_SEPARATOR:
1448 result.append("Zs");
1449 break;
1450 case UnicodeSpec.UNASSIGNED:
1451 result.append("unassigned");
1452 break;
1453 }
1454
1455 switch ((int)((val & maskBidi) >> shiftBidi)) {
1456 case UnicodeSpec.DIRECTIONALITY_LEFT_TO_RIGHT:
1457 result.append(", L");
1458 break;
1459 case UnicodeSpec.DIRECTIONALITY_RIGHT_TO_LEFT:
1460 result.append(", R");
1461 break;
1462 case UnicodeSpec.DIRECTIONALITY_EUROPEAN_NUMBER:
1463 result.append(", EN");
1464 break;
1465 case UnicodeSpec.DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR:
1466 result.append(", ES");
1467 break;
1468 case UnicodeSpec.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR:
1469 result.append(", ET");
1470 break;
1471 case UnicodeSpec.DIRECTIONALITY_ARABIC_NUMBER:
1472 result.append(", AN");
1473 break;
1474 case UnicodeSpec.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR:
1475 result.append(", CS");
1476 break;
1477 case UnicodeSpec.DIRECTIONALITY_PARAGRAPH_SEPARATOR:
1478 result.append(", B");
1479 break;
1480 case UnicodeSpec.DIRECTIONALITY_SEGMENT_SEPARATOR:
1481 result.append(", S");
1482 break;
1483 case UnicodeSpec.DIRECTIONALITY_WHITESPACE:
1484 result.append(", WS");
1485 break;
1486 case UnicodeSpec.DIRECTIONALITY_OTHER_NEUTRALS:
1487 result.append(", ON");
1488 break;
1489 }
1490 if ((val & maskUpperCase) != 0) {
1491 result.append(", hasUpper (subtract ");
1492 result.append((val & maskCaseOffset) >> shiftCaseOffset).append(")");
1493 }
1494 if ((val & maskLowerCase) != 0) {
1495 result.append(", hasLower (add ");
1496 result.append((val & maskCaseOffset) >> shiftCaseOffset).append(")");
1497 }
1498 if ((val & maskTitleCase) != 0) {
1499 result.append(", hasTitle");
1500 }
1501 if ((val & maskIdentifierInfo) == valueIgnorable) {
1502 result.append(", ignorable");
1503 }
1504 if ((val & maskIdentifierInfo) == valueJavaUnicodePart) {
1505 result.append(", identifier part");
1506 }
1507 if ((val & maskIdentifierInfo) == valueJavaStartUnicodePart) {
1508 result.append(", underscore");
1509 }
1510 if ((val & maskIdentifierInfo) == valueJavaWhitespace) {
1511 result.append(", whitespace");
1512 }
1513 if ((val & maskIdentifierInfo) == valueJavaOnlyStart) {
1514 result.append(", currency");
1515 }
1516 if ((val & maskIdentifierInfo) == valueJavaUnicodeStart) {
1517 result.append(", identifier start");
1518 }
1519 if ((val & maskNumericType) == valueDigit) {
1520 result.append(", decimal ");
1521 result.append((val & maskDigitOffset) >> shiftDigitOffset);
1522 }
1523 if ((val & maskNumericType) == valueStrangeNumeric) {
1524 result.append(", strange");
1525 }
1526 if ((val & maskNumericType) == valueJavaSupradecimal) {
1527 result.append(", supradecimal ");
1528 result.append((val & maskDigitOffset) >> shiftDigitOffset);
1529 }
1530 }
1531
1532 static String[] tableNames = { "X", "Y", "Z", "P", "Q", "R", "S", "T", "U", "V", "W" };
1533
1534 static String tableName(int j) { return tableNames[j]; }
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560 static String genAccess(String tbl, String var, int bits) {
1561 String access = null;
1562 int bitoffset = bits == 1 ? 5 : bits == 2 ? 4 : bits == 4 ? 3 : 0;
1563 for (int k = 0; k < sizes.length; k++) {
1564 int offset = ((k < sizes.length - 1) ? 0 : bitoffset);
1565 int shift = shifts[k] + offset;
1566 String shifted = (shift == 0) ? var : "(" + var + ">>" + shift + ")";
1567 int mask = (1 << (sizes[k] - offset)) - 1;
1568 String masked = (k == 0) ? shifted :
1569 "(" + shifted + "&0x" + hex(mask) + ")";
1570 String index = (k == 0) ? masked :
1571 (mask == 0) ? access : "(" + access + "|" + masked + ")";
1572 String indexNoParens = (index.charAt(0) != '(') ? index :
1573 index.substring(1, index.length() - 1);
1574 String tblname = (k == sizes.length - 1) ? tbl : tableName(k);
1575 String fetched = tblname + "[" + indexNoParens + "]";
1576 String zeroextended = (zeroextend[k] == 0) ? fetched :
1577 "(" + fetched + "&0x" + hex(zeroextend[k]) + ")";
1578 int adjustment = preshifted[k] ? 0 :
1579 sizes[k+1] - ((k == sizes.length - 2) ? bitoffset : 0);
1580 String adjusted = (preshifted[k] || adjustment == 0) ? zeroextended :
1581 "(" + zeroextended + "<<" + adjustment + ")";
1582 String bitshift = (bits == 1) ? "(" + var + "&0x1F)" :
1583 (bits == 2) ? "((" + var + "&0xF)<<1)" :
1584 (bits == 4) ? "((" + var + "&7)<<2)" : null;
1585 String extracted = ((k < sizes.length - 1) || (bits >= 8)) ? adjusted :
1586 "((" + adjusted + ">>" + bitshift + ")&" +
1587 (bits == 4 ? "0xF" : "" + ((1 << bits) - 1)) + ")";
1588 access = extracted;
1589 }
1590 return access;
1591 }
1592
1593
1594
1595
1596
1597 static boolean verbose = false;
1598 static boolean nobidi = false;
1599 static boolean nomirror = false;
1600 static boolean identifiers = false;
1601 static boolean Csyntax = false;
1602 static String TemplateFileName = null;
1603 static String OutputFileName = null;
1604 static String UnicodeSpecFileName = null;
1605 static String SpecialCasingFileName = null;
1606 static String PropListFileName = null;
1607 static boolean useCharForByte = false;
1608 static int[] sizes;
1609 static int bins = 0;
1610 static boolean tableAsString = false;
1611 static boolean bLatin1 = false;
1612
1613 static String commandLineDescription;
1614
1615
1616
1617 static int[] shifts;
1618 static int[] zeroextend;
1619 static int[] bytes;
1620 static boolean[] preshifted;
1621 static long[][] tables;
1622
1623
1624
1625 static String commentStart;
1626 static String commentEnd;
1627
1628 static StringBuffer initializers = new StringBuffer();
1629
1630
1631 static SpecialCaseMap[] specialCaseMaps;
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669 static void processArgs(String[] args) {
1670 StringBuffer desc = new StringBuffer("java GenerateCharacter");
1671 for (int j=0; j<args.length; ++j) {
1672 desc.append(" " + args[j]);
1673 }
1674 for (int j = 0; j < args.length; j++) {
1675 if (args[j].equals("-verbose") || args[j].equals("-v"))
1676 verbose = true;
1677 else if (args[j].equals("-nobidi"))
1678 nobidi = true;
1679 else if (args[j].equals("-nomirror"))
1680 nomirror = true;
1681 else if (args[j].equals("-identifiers"))
1682 identifiers = true;
1683 else if (args[j].equals("-c"))
1684 Csyntax = true;
1685 else if (args[j].equals("-string"))
1686 tableAsString = true;
1687 else if (args[j].equals("-o")) {
1688 if (j == args.length - 1) {
1689 FAIL("File name missing after -o");
1690 }
1691 else {
1692 OutputFileName = args[++j];
1693 }
1694 }
1695 else if (args[j].equals("-search")) {
1696 if (j == args.length - 1)
1697 FAIL("Bin count missing after -search");
1698 else {
1699 bins = Integer.parseInt(args[++j]);
1700 if (bins < 1 || bins > 10)
1701 FAIL("Bin count must be >= 1 and <= 10");
1702 }
1703 }
1704 else if (args[j].equals("-template")) {
1705 if (j == args.length - 1)
1706 FAIL("File name missing after -template");
1707 else
1708 TemplateFileName = args[++j];
1709 }
1710 else if (args[j].equals("-spec")) {
1711 if (j == args.length - 1) {
1712 FAIL("File name missing after -spec");
1713 }
1714 else {
1715 UnicodeSpecFileName = args[++j];
1716 }
1717 }
1718 else if (args[j].equals("-specialcasing")) {
1719 if (j == args.length -1) {
1720 FAIL("File name missing after -specialcasing");
1721 }
1722 else {
1723 SpecialCasingFileName = args[++j];
1724 }
1725 }
1726 else if (args[j].equals("-proplist")) {
1727 if (j == args.length -1) {
1728 FAIL("File name missing after -proplist");
1729 }
1730 else {
1731 PropListFileName = args[++j];
1732 }
1733 }
1734 else if (args[j].equals("-plane")) {
1735 if (j == args.length -1) {
1736 FAIL("Plane number missing after -plane");
1737 }
1738 else {
1739 plane = Integer.parseInt(args[++j]);
1740 }
1741 if (plane > 0) {
1742 bLatin1 = false;
1743 }
1744 }
1745 else if ("-usecharforbyte".equals(args[j])) {
1746 useCharForByte = true;
1747 }
1748 else if (args[j].equals("-latin1")) {
1749 bLatin1 = true;
1750 plane = 0;
1751 }
1752 else {
1753 try {
1754 int val = Integer.parseInt(args[j]);
1755 if (val < 0 || val > 32) FAIL("Incorrect bit field width: " + args[j]);
1756 if (sizes == null)
1757 sizes = new int[1];
1758 else {
1759 int[] newsizes = new int[sizes.length + 1];
1760 System.arraycopy(sizes, 0, newsizes, 0, sizes.length);
1761 sizes = newsizes;
1762 }
1763 sizes[sizes.length - 1] = val;
1764 }
1765 catch(NumberFormatException e) {
1766 FAIL("Unknown switch: " + args[j]);
1767 }
1768 }
1769 }
1770 if (Csyntax && tableAsString) {
1771 FAIL("Can't specify table as string with C syntax");
1772 }
1773 if (sizes == null) {
1774 desc.append(" [");
1775 if (identifiers) {
1776 int[] newsizes = { 8, 4, 4 };
1777 desc.append("8 4 4]");
1778 sizes = newsizes;
1779 }
1780 else {
1781 int[] newsizes = { 10, 5, 1 };
1782 desc.append("10 5 1]");
1783 sizes = newsizes;
1784 }
1785 }
1786 if (UnicodeSpecFileName == null) {
1787 UnicodeSpecFileName = DefaultUnicodeSpecFileName;
1788 desc.append(" [-spec " + UnicodeSpecFileName + ']');
1789 }
1790 if (SpecialCasingFileName == null) {
1791 SpecialCasingFileName = DefaultSpecialCasingFileName;
1792 desc.append(" [-specialcasing " + SpecialCasingFileName + ']');
1793 }
1794 if (PropListFileName == null) {
1795 PropListFileName = DefaultPropListFileName;
1796 desc.append(" [-proplist " + PropListFileName + ']');
1797 }
1798 if (TemplateFileName == null) {
1799 TemplateFileName = (Csyntax ? DefaultCTemplateFileName
1800 : DefaultJavaTemplateFileName);
1801 desc.append(" [-template " + TemplateFileName + ']');
1802 }
1803 if (OutputFileName == null) {
1804 OutputFileName = (Csyntax ? DefaultCOutputFileName
1805 : DefaultJavaOutputFileName);
1806 desc.append(" [-o " + OutputFileName + ']');
1807 }
1808 commentStart = (Csyntax ? "/*" : "//");
1809 commentEnd = (Csyntax ? " */" : "");
1810 commandLineDescription = desc.toString();
1811 }
1812
1813 private static void searchBins(long[] map, int binsOccupied) throws Exception {
1814 int bitsFree = 16;
1815 for (int i=0; i<binsOccupied; ++i) bitsFree -= sizes[i];
1816 if (binsOccupied == (bins-1)) {
1817 sizes[binsOccupied] = bitsFree;
1818 generateForSizes(map);
1819 }
1820 else {
1821 for (int i=1; i<bitsFree; ++i) {
1822 sizes[binsOccupied] = i;
1823 searchBins(map, binsOccupied+1);
1824 }
1825 }
1826 }
1827
1828 private static void generateForSizes(long[] map) throws Exception {
1829 int sum = 0;
1830 shifts = new int[sizes.length];
1831 for (int k = sizes.length - 1; k >= 0; k--) {
1832 shifts[k] = sum;
1833 sum += sizes[k];
1834 }
1835 if ((1 << sum) < map.length || (1 << (sum - 1)) >= map.length) {
1836 FAIL("Bit field widths total to " + sum +
1837 ": wrong total for map of size " + map.length);
1838 }
1839
1840 tables = new long[sizes.length][];
1841
1842 tables[sizes.length - 1] = map;
1843 for (int j = sizes.length - 1; j > 0; j--) {
1844 if (verbose && bins==0)
1845 System.err.println("Building map " + (j+1) + " of bit width " + sizes[j]);
1846 long[][] temp = buildTable(tables[j], sizes[j]);
1847 tables[j-1] = temp[0];
1848 tables[j] = temp[1];
1849 }
1850 preshifted = new boolean[sizes.length];
1851 zeroextend = new int[sizes.length];
1852 bytes = new int[sizes.length];
1853 for (int j = 0; j < sizes.length - 1; j++) {
1854 int len = tables[j+1].length;
1855 int size = sizes[j+1];
1856 if (len > 0x100 && (len >> size) <= 0x100) {
1857 len >>= size;
1858 preshifted[j] = false;
1859 }
1860 else if (len > 0x10000 && (len >> size) <= 0x10000) {
1861 len >>= size;
1862 preshifted[j] = false;
1863 }
1864 else preshifted[j] = true;
1865 if (Csyntax)
1866 zeroextend[j] = 0;
1867 else if (len > 0x7F && len <= 0xFF) {
1868 if (!useCharForByte) {
1869 zeroextend[j] = 0xFF;
1870 }
1871 } else if (len > 0x7FFF && len <= 0xFFFF)
1872 zeroextend[j] = 0xFFFF;
1873 else zeroextend[j] = 0;
1874 if (len <= 0x100) bytes[j] = 1;
1875 else if (len <= 0x10000) bytes[j] = 2;
1876 else bytes[j] = 4;
1877 }
1878 preshifted[sizes.length - 1] = true;
1879 zeroextend[sizes.length - 1] = 0;
1880 bytes[sizes.length - 1] = 0;
1881 if (bins > 0) {
1882 int totalBytes = getTotalBytes();
1883 String access = genAccess("A", "ch", (identifiers ? 2 : 32));
1884 int accessComplexity = 0;
1885 for (int j=0; j<access.length(); ++j) {
1886 char ch = access.charAt(j);
1887 if ("[&|><".indexOf(ch) >= 0) ++accessComplexity;
1888 if (ch == '<' || ch == '>') ++j;
1889 }
1890 System.out.print("(");
1891 for (int j=0; j<sizes.length; ++j) System.out.print(" " + sizes[j]);
1892 System.out.println(" ) " + totalBytes + " " + accessComplexity + " " + access);
1893 return;
1894 }
1895 if (verbose) {
1896 System.out.println(" n\t size\tlength\tshift\tzeroext\tbytes\tpreshifted");
1897 for (int j = 0; j < sizes.length; j++) {
1898 System.out.println(dec5(j) + "\t" +
1899 dec5(sizes[j]) + "\t" +
1900 dec5(tables[j].length) + "\t" +
1901 dec5(shifts[j]) + "\t" +
1902 dec5(zeroextend[j]) + "\t" +
1903 dec5(bytes[j]) + "\t " +
1904 preshifted[j]);
1905 }
1906 }
1907 if (verbose) {
1908 System.out.println("Generating source code for class Character");
1909 System.out.println("A table access looks like " +
1910 genAccess("A", "ch", (identifiers ? 2 : 32)));
1911 }
1912 generateCharacterClass(TemplateFileName, OutputFileName);
1913 }
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942 public static void main(String[] args) {
1943 processArgs(args);
1944 try {
1945
1946 UnicodeSpec[] data = UnicodeSpec.readSpecFile(new File(UnicodeSpecFileName), plane);
1947 specialCaseMaps = SpecialCaseMap.readSpecFile(new File(SpecialCasingFileName), plane);
1948 PropList propList = PropList.readSpecFile(new File(PropListFileName), plane);
1949
1950 if (verbose) {
1951 System.out.println(data.length + " items read from Unicode spec file " + UnicodeSpecFileName);
1952 }
1953 long[] map = buildMap(data, specialCaseMaps, propList);
1954 if (verbose) {
1955 System.err.println("Completed building of initial map");
1956 }
1957
1958 if (bins == 0) {
1959 generateForSizes(map);
1960 }
1961 else {
1962 while (bins > 0) {
1963 sizes = new int[bins];
1964 searchBins(map, 0);
1965 --bins;
1966 }
1967 }
1968 if (verbose && false) {
1969 System.out.println("Offset range seen: -" + hex8(-minOffsetSeen) + "..+" +
1970 hex8(maxOffsetSeen));
1971 System.out.println(" allowed: -" + hex8(-minOffset) + "..+" +
1972 hex8(maxOffset));
1973 }
1974 }
1975 catch (FileNotFoundException e) { FAIL(e.toString()); }
1976 catch (IOException e) { FAIL(e.toString()); }
1977 catch (Throwable e) {
1978 System.out.println("Unexpected exception:");
1979 e.printStackTrace();
1980 FAIL("Unexpected exception!");
1981 }
1982 if (verbose) { System.out.println("Done!");}
1983 }
1984
1985 }